********************************************************************************
*** Analysis APPENDIX F: Comparing comparing characteristics of peers in ***
*** high school vs the military ***
********************************************************************************

********************************************************************************
*** Part A: Creating high school data
********************************************************************************
*** This part creates variables such as "school groups", 
*** "share high SES in groups" in last year of high school
*** In order to compare group characteristics in high school vs the military

clear all

cd /home/almdan/EoC/analysis/revising_citizenship/POBE_final/

capture log close

log using "log/POBE_create_schooldata_for_appendix_F", replace text

set more off

********************************************************************************
********************************************************************************
cd /home/almdan/EoC/build/output/

*** Load cleaned and harmonized schooldata with background variables.
*** High school data from Education register
*** Background variables from the RTB and LISA registers 
*** Parental background is matched using a file connecting
*** parents and children.
*** All obtained from Statistics Sweden (SCB)

use "EOC_diversity", clear

cd /home/almdan/EoC/analysis/revising_citizenship/POBE_final/
********************************************************************************
** Analysis on schools between 2001 and 2005. Last year in high school ***

*** KEY VARIABLES ***
*** avgar = graduation year (education register)
*** utbildning = type of education (education register)
*** lopnr_skolkod = code for school (education register)
*** Sun2000Niva_old_YEAR = education level (LISA register)

sort avgar utbildning lopnr_skolkod
 
egen group_school= group(avgar utbildning lopnr_skolkod) if avgar>2000 & avgar<2006
sort group_school
by group_school: gen group_school_size = _N if avgar>2000 & avgar<2006 & group_school!=.

gen smallgroup=(group_school_size<10) if !missing(group_school_size)

hist group_school_size if avgar>2000 & avgar<2006 & smallgroup==0, frac lcolor(black) lpattern("l") xtitle("Size of Groups") scheme(s1mono)  blw(*.4) title("High School (2001-2005)")  bcolor(gs11) blcolor(black) 
graph save "graphs/diversity/school/hist_group_size", replace
graph export "graphs/diversity/school/hist_group_size.pdf", replace

hist group_school_size if avgar>2000 & avgar<2006, frac lcolor(black) lpattern("l") xtitle("Size of Groups") scheme(s1mono) blw(*.4) title("High School (2001-2005)")   bcolor(gs11) blcolor(black) 
graph save "graphs/diversity/school/hist_group_size_nolimit", replace
graph export "graphs/diversity/school/hist_group_size_nolimit.pdf", replace

** share foreign in groups **
by group_school: egen group_school_foreign = sum(foreign) if avgar>2000 & avgar<2006
replace group_school_foreign = (group_school_foreign - foreign)/(group_school_size  - 1) if avgar>2000 & avgar<2006


sum group_school_size, detail
sum group_school_foreign, detail

sum group_school_foreign if foreign==0, detail
sum group_school_foreign if foreign==1, detail

hist group_school_foreign if avgar>2000 & avgar<2006 & smallgroup==0, frac lcolor(black) lpattern("l") xtitle("Leave out mean foreign background") scheme(s1mono)
graph save "graphs/diversity/school/hist_group_foreign", replace
graph export "graphs/diversity/school/hist_group_foreign.pdf", replace

*** Simple group mean (instead of leavo one out). ***
by group_school: egen group_school_foreign_mean = mean(foreign) if avgar>2000 & avgar<2006 & !missing(group_school)

sum group_school_foreign_mean, detail

sum group_school_foreign_mean if foreign==0, detail
sum group_school_foreign_mean if foreign==1, detail

hist group_school_foreign_mean if avgar>2000 & avgar<2006 & smallgroup==0, frac lcolor(black) lpattern("l") xtitle("Mean foreign background") scheme(s1mono)
graph save "graphs/diversity/school/hist_group_foreign_mean", replace
graph export "graphs/diversity/school/hist_group_foreign_mean.pdf", replace

********************************************************************************
*** Father's education ***
********************************************************************************
destring Father_Sun2000Niva_old_1990 Father_Sun2000Niva_old_1991 Father_Sun2000Niva_old_1993 Father_Sun2000Niva_old_1994 Father_Sun2000Niva_old_1995 Father_Sun2000Niva_old_1996 Father_Sun2000Niva_old_1997 Father_Sun2000Niva_old_1998 Father_Sun2000Niva_old_1999 Father_Sun2000Niva_old_2000 Father_Sun2000Niva_old_2001 Father_Sun2000Niva_old_2002 Father_Sun2000Niva_old_2003 Father_Sun2000Niva_old_2004 Father_Sun2000Niva_old_2005, replace force


egen fathers_educ=rowmax(Father_Sun2000Niva_old_1990 Father_Sun2000Niva_old_1991 Father_Sun2000Niva_old_1993 Father_Sun2000Niva_old_1994 Father_Sun2000Niva_old_1995 Father_Sun2000Niva_old_1996 Father_Sun2000Niva_old_1997 Father_Sun2000Niva_old_1998 Father_Sun2000Niva_old_1999 Father_Sun2000Niva_old_2000 Father_Sun2000Niva_old_2001 Father_Sun2000Niva_old_2002 Father_Sun2000Niva_old_2003 Father_Sun2000Niva_old_2004 Father_Sun2000Niva_old_2005)

tab fathers_educ

gen fathers_high_educ=(fathers_educ>3) if  !missing(fathers_educ)
** share high educ fathers in groups **
by group_school: egen group_school_higheduc = total(fathers_high_educ) if avgar>2000 & avgar<2006
replace group_school_higheduc = (group_school_higheduc - fathers_high_educ)/(group_school_size  - 1) if avgar>2000 & avgar<2006


sum group_school_higheduc, detail

sum group_school_higheduc if fathers_high_educ==0, detail
sum group_school_higheduc if fathers_high_educ==1, detail

hist group_school_higheduc if avgar>2000 & avgar<2006 & smallgroup==0, frac lcolor(black) lpattern("l") xtitle("Leave out mean high father's education'") scheme(s1mono)
graph save "graphs/diversity/school/hist_group_fatheduc", replace
graph export "graphs/diversity/school/hist_group_fatheduc.pdf", replace

*** Simple Mean fathers education ****
by group_school: egen group_school_higheduc_mean = mean(fathers_high_educ) if avgar>2000 & avgar<2006 & !missing(group_school)

sum group_school_higheduc_mean, detail

sum group_school_higheduc_mean if fathers_high_educ==0, detail
sum group_school_higheduc_mean if fathers_high_educ==1, detail

hist group_school_higheduc_mean if avgar>2000 & avgar<2006 & smallgroup==0, frac lcolor(black) lpattern("l") xtitle("Mean high father's education'") scheme(s1mono)
graph save "graphs/diversity/school/hist_group_fatheduc_mean", replace
graph export "graphs/diversity/school/hist_group_fatheduc_mean.pdf", replace

*******************************************************************************
gen fathers_educ_quart=1 if (fathers_educ<3) &  !missing(fathers_educ)
replace fathers_educ_quart=2 if (fathers_educ==3) &  !missing(fathers_educ)
replace fathers_educ_quart=3 if (fathers_educ==4 | fathers_educ==5) &  !missing(fathers_educ)
replace fathers_educ_quart=4 if (fathers_educ>5) &  !missing(fathers_educ)

hist fathers_educ_quart if avgar>2000 & avgar<2006 

tab fathers_educ_quart if avgar>2000 & avgar<2006 , gen(fathers_educ_quart_)

********************************************************************************
*** Father's income ***
*** quart_earn_age3645_men = income quartile (own created data set merged on 
*** EOC_diversity.dta)
*** The income data set contains incomes of all men in Sweden
*** They are then placed in income quartiles within their birth cohort
*** Source is the income tax regiter from SCB
********************************************************************************
rename quart_earn_age3645_men  quart_earn_age3645_men_old
rename quart_earn_age3645_men_1 quart_earn_age3645_men_1_old
rename quart_earn_age3645_men_2 quart_earn_age3645_men_2_old
rename quart_earn_age3645_men_3 quart_earn_age3645_men_3_old
rename quart_earn_age3645_men_4 quart_earn_age3645_men_4_old

********************************************************************************
*** Merge new income ranking ***
********************************************************************************
cd /home/almdan/EoC/build/

rename lopnr LopNr_temp
rename LopNr_Far LopNr

merge m:1 LopNr using "normdata/income_ranking_men_b.dta", generate(merge_income_ranking_men_b)

 tab merge_income_ranking_men_b merge_income_ranking_men 
  
drop if merge_income_ranking_men_b==2

rename LopNr LopNr_Far
rename LopNr_temp lopnr

 count if quart_earn_age3645_men !=.
  count if quart_earn_age3645_men_old !=.
  
cd /home/almdan/EoC/analysis/revising_citizenship/POBE_final/

********************************************************************************
sort group_school

gen high_inc_father=(quart_earn_age3645_men>2) if avgar>2000 & avgar<2006 & !missing(quart_earn_age3645_men)

by group_school: egen group_school_highinc_fath = total(high_inc_father) if avgar>2000 & avgar<2006 & !missing(group_school)
replace group_school_highinc_fath  = (group_school_highinc_fath  - high_inc_father)/(group_school_size  - 1) if avgar>2000 & avgar<2006


sum group_school_highinc_fath , detail

sum group_school_highinc_fath  if high_inc_father==0, detail
sum group_school_highinc_fath  if high_inc_father==1, detail

hist group_school_highinc_fath if avgar>2000 & avgar<2006 & smallgroup==0, frac lcolor(black) lpattern("l") xtitle("Leave out mean high father's income") scheme(s1mono)
graph save "graphs/diversity/school/hist_group_fatherinc", replace
graph export "graphs/diversity/school/hist_group_fatherinc.pdf", replace

*** Simple mean father's income ***

by group_school: egen group_school_highinc_fath_mean = mean(high_inc_father) if avgar>2000 & avgar<2006

sum group_school_highinc_fath_mean , detail

sum group_school_highinc_fath_mean  if high_inc_father==0, detail
sum group_school_highinc_fath_mean  if high_inc_father==1, detail

hist group_school_highinc_fath_mean if avgar>2000 & avgar<2006 & smallgroup==0, frac lcolor(black) lpattern("l") xtitle("Mean high father's income") scheme(s1mono)
graph save "graphs/diversity/school/hist_group_fatherinc_mean", replace
graph export "graphs/diversity/school/hist_group_fatherinc_mean.pdf", replace

*** Father's income quartiles ***

forvalues q=1(1)4 {
by group_school: egen group_school_fath_inc_q_`q' = total(quart_earn_age3645_men_`q') if avgar>2000 & avgar<2006
replace group_school_fath_inc_q_`q' = (group_school_fath_inc_q_`q' -quart_earn_age3645_men_`q')/(group_school_size  - 1) if avgar>2000 & avgar<2006


sum group_school_fath_inc_q_`q' , detail

hist group_school_fath_inc_q_`q' if avgar>2000 & avgar<2006 & smallgroup==0, frac lcolor(black) lpattern("l") xtitle("Leave out mean father's income quartile `q'") scheme(s1mono)
graph save "graphs/diversity/school/hist_group_fatherinc_quart_`q'", replace
graph export "graphs/diversity/school/hist_group_fatherinc_quart_`q'.pdf", replace

*** Simple mean father's education ***

by group_school: egen group_school_fath_inc_q_`q'_mean = mean(quart_earn_age3645_men_`q') if avgar>2000 & avgar<2006

sum group_school_fath_inc_q_`q'_mean , detail

hist group_school_fath_inc_q_`q'_mean if avgar>2000 & avgar<2006 & smallgroup==0, frac lcolor(black) lpattern("l") xtitle("Mean father's income quartile `q'") scheme(s1mono)
graph save "graphs/diversity/school/hist_group_fatherinc_quart_`q'_mean", replace
graph export "graphs/diversity/school/hist_group_fatherinc_quart_`q'_mean.pdf", replace

}

*** Share in each father's income quartile in high school ***

graph bar if avgar>2000 & avgar<2006, over(quart_earn_age3645_men, relabel(1 "Quartile 1" 2 "Quartile 2" 3 "Quartile 3" 4 "Quartile 4")) bargap(1) ytitle("Percent") ytick(0(5)40) ylabel(0(10)40) title("High School")  saving("graphs/diversity/school/descriptives_inc_school04", replace) scheme(s1mono)
graph export "graphs/diversity/school/descriptives_inc_school04.pdf", replace
graph export "graphs/diversity/school/descriptives_inc_school04.tif", replace

tab quart_earn_age3645_men if avgar>2000 & avgar<2006
********************************************************************************
*** Summary statistics for schools ***
********************************************************************************
*** Tag one obs in each group to get an unweighted measure **

egen group_school_tag=tag(group_school) if avgar>2000 & avgar<2006

distinct group_school

set more off
eststo clear

eststo: estpost sum group_school_size group_school_foreign group_school_foreign_mean group_school_highinc_fath group_school_highinc_fath_mean group_school_fath_inc_q_1 group_school_fath_inc_q_2 group_school_fath_inc_q_3 group_school_fath_inc_q_4 group_school_fath_inc_q_1_mean group_school_fath_inc_q_2_mean group_school_fath_inc_q_3_mean group_school_fath_inc_q_4_mean group_school_higheduc if avgar>2000 & avgar<2006 & smallgroup==0, det

eststo: estpost sum group_school_size group_school_foreign group_school_foreign_mean group_school_highinc_fath group_school_highinc_fath_mean group_school_fath_inc_q_1 group_school_fath_inc_q_2 group_school_fath_inc_q_3 group_school_fath_inc_q_4 group_school_fath_inc_q_1_mean group_school_fath_inc_q_2_mean group_school_fath_inc_q_3_mean group_school_fath_inc_q_4_mean group_school_higheduc if avgar>2000 & avgar<2006, det

eststo: estpost sum group_school_size group_school_foreign group_school_foreign_mean group_school_highinc_fath group_school_highinc_fath_mean group_school_fath_inc_q_1 group_school_fath_inc_q_2 group_school_fath_inc_q_3 group_school_fath_inc_q_4 group_school_fath_inc_q_1_mean group_school_fath_inc_q_2_mean group_school_fath_inc_q_3_mean group_school_fath_inc_q_4_mean group_school_higheduc if avgar>2000 & avgar<2006 & smallgroup==0 & group_school_tag==1, det

eststo: estpost sum group_school_size group_school_foreign group_school_foreign_mean group_school_highinc_fath group_school_highinc_fath_mean group_school_fath_inc_q_1 group_school_fath_inc_q_2 group_school_fath_inc_q_3 group_school_fath_inc_q_4 group_school_fath_inc_q_1_mean group_school_fath_inc_q_2_mean group_school_fath_inc_q_3_mean group_school_fath_inc_q_4_mean group_school_higheduc if avgar>2000 & avgar<2006 & group_school_tag==1, det

esttab est1 est2 using "output/diversity/school/sumstats_diversity_school.tex", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar mtitles("All obs., group size>9" "All obs") replace label

esttab est1 est2 using "output/diversity/school/sumstats_diversity_school.rtf", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar mtitles("All obs., group size>9" "All obs" ) replace label

esttab est3 est4 using "output/diversity/school/sumstats_diversity_school_unwe.tex", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar mtitles("One obs. per school, group size>9" "One obs. per school") replace label

esttab est3 est4 using "output/diversity/school/sumstats_diversity_school_unwe.rtf", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar mtitles("One obs. per school, group size>9" "One obs. per school") replace label


keep group_school-group_school_tag avgar-merge_avggymn9720 lopnr foreign  quart_earn_age3645_men quart_earn_age3645_men_old


cd /home/almdan/EoC/analysis/revising_citizenship/temp_data_schooldiversity/

save "EOC_diversity_school_2001_2005_b", replace

*** Save the school data and merge it later with the military data in the next 
*** part ***

********************************************************************************
********************************************************************************
*** Part B: Load the military final data ***
********************************************************************************
********************************************************************************

clear all

cd /home/almdan/EoC/analysis/revising_citizenship/

set maxvar 15000

use "data/finaldata_revising_citizenship", clear

set more off

cd /home/almdan/EoC/analysis/revising_citizenship/POBE_final/

capture log close

log using "log/POBE_final_appendix_F", replace text
********************************************************************************
keep lopnr-muskkap lopnr* fath_educ_level moth_educ_level utan inrar btkdpos1 btkdpos3 insk_date_sif civserv_inskr foreign utan_coherent

set scheme s1mono

********************************************************************************
*** This code below explores composition in high school vs military units ***
********************************************************************************

*** KEY VARIABLES ***
*** fath_educ_level = fathers education level (from SCB, LISA register)
*** utan_coherent = time consistent code for military unit (from the NSA data)
*** inrar = planned entry year in the military (from the NSA data)
*** insk_date_sif = date conscripted (from the NSA data)
*** foreign = foreign background (definition by SCB, LISA register)
*** btkdpos1 = first entry in the service occupation code (btkd) indicationg line of service (from the NSA data)
*** btkdpos3 = third entry in the service occupation code (btkd) indication level of service (from the NSA data)
*** civserv_inskr = conscripted into civil service  (first position in btkd = Q)

********************************************************************************
*** Exploring group sizes in the military ***
********************************************************************************

sort utan_coherent inrar btkdpos1 btkdpos3
egen group_yorc = group(utan_coherent inrar btkdpos1 btkdpos3) if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 

sort group_yorc
by group_yorc: gen group_yorc_size = _N if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & group_yorc!=.

hist group_yorc_size if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & group_yorc!=., frac lcolor(black) lpattern("l") xtitle("Size of Groups") scheme(s1mono) blw(*.4) title(Selective Conscription)  bcolor(gs11) blcolor(black) 
graph save "graphs/diversity/hist_group_yorc_size", replace
graph export "graphs/diversity/hist_group_yorc_size.pdf", replace
graph export "graphs/diversity/hist_group_yorc_size.tif", replace

sum group_yorc_size, det

gen smallgroup_yorc=(group_yorc_size<10) 

gen smallgroup6_yorc=(group_yorc_size<6)

********************************************************************************
********************************************************************************
*** SES by fathes's income level ***
********************************************************************************
********************************************************************************

*******************************************************************************
*** Merge Income ranking on Father***
*******************************************************************************
*** The file contains incomes of all men in Sweden
*** They are then placed in income quartiles within their birth cohort
*** Sources are LISA and income tax regiters from SCB
*******************************************************************************
cd /home/almdan/EoC/build/

rename lopnr LopNr_temp
rename lopnr_far LopNr

merge m:1 LopNr using "normdata/income_ranking_men_b.dta", generate(merge_income_ranking_men)

tab merge_income_ranking_men if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0
drop if merge_income_ranking_men==2

rename LopNr lopnr_far
rename LopNr_temp lopnr

cd /home/almdan/EoC/analysis/revising_citizenship/POBE_final/
********************************************************************************
**** Fathers income ****
********************************************************************************

*** Creating variable indicating father income above meadian => high father's income ***

sort group_yorc
gen high_inc_father_04=(quart_earn_age3645_men>2) if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & !missing(quart_earn_age3645_men)

*** Creating leave i out share of high father's income in each group ***
by group_yorc: egen group_yorc_highinc_fath_04 = total(high_inc_father_04) if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & !missing(group_yorc)
replace group_yorc_highinc_fath_04  = (group_yorc_highinc_fath_04  - high_inc_father_04)/(group_yorc_size  - 1) if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0


sum group_yorc_highinc_fath_04 , detail

sum group_yorc_highinc_fath_04  if high_inc_father_04==0, detail
sum group_yorc_highinc_fath_04  if high_inc_father_04==1, detail

hist group_yorc_highinc_fath_04 if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup_yorc==0, frac lcolor(black) lpattern("l") xtitle("Leave out share high father's income") scheme(s1mono) blw(*.4) xtick(0(0.05)1) xlabel(0(0.1)1)
graph save "graphs/diversity/hist_group_fatherinc_04", replace
graph export "graphs/diversity/hist_group_fatherinc_04.pdf", replace
graph export "graphs/diversity/hist_group_fatherinc_04.tif", replace

*** Simple mean father's income ***

by group_yorc: egen group_yorc_highinc_fath_04_mean = mean(high_inc_father_04) if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0

sum group_yorc_highinc_fath_04_mean , detail

sum group_yorc_highinc_fath_04_mean  if high_inc_father_04==0, detail
sum group_yorc_highinc_fath_04_mean  if high_inc_father_04==1, detail

hist group_yorc_highinc_fath_04_mean if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup_yorc==0, frac lcolor(black) lpattern("l") xtitle("Share high father's income") scheme(s1mono) blw(*.4) xtick(0(0.05)1) xlabel(0(0.1)1)
graph save "graphs/diversity/hist_group_fatherinc_04_mean", replace
graph export "graphs/diversity/hist_group_fatherinc_04_mean.pdf", replace
graph export "graphs/diversity/hist_group_fatherinc_04_mean.tif", replace

********** Father's income quartiles *******

forvalues q=1(1)4 {
by group_yorc: egen group_yorc_fath_inc_04_q_`q' = total(quart_earn_age3645_men_`q') if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0
replace group_yorc_fath_inc_04_q_`q' = (group_yorc_fath_inc_04_q_`q' -quart_earn_age3645_men_`q')/(group_yorc_size  - 1) if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0


sum group_yorc_fath_inc_04_q_`q' , detail

hist group_yorc_fath_inc_04_q_`q' if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup_yorc==0, frac lcolor(black) lpattern("l") xtitle("Leave out share father's income quartile `q'") scheme(s1mono) blw(*.4) xtick(0(0.05)1) xlabel(0(0.1)1)
graph save "graphs/diversity/hist_group_fatherinc_04_quart_`q'", replace
graph export "graphs/diversity/hist_group_fatherinc_04_quart_`q'.pdf", replace
graph export "graphs/diversity/hist_group_fatherinc_04_quart_`q'.tif", replace
*** Simple mean parents education ***

by group_yorc: egen group_yorc_fath_inc_04_q_`q'_mean = mean(quart_earn_age3645_men_`q') if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0

sum group_yorc_fath_inc_04_q_`q'_mean , detail

hist group_yorc_fath_inc_04_q_`q'_mean if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup_yorc==0, frac lcolor(black) lpattern("l") xtitle("Share father's income quartile `q'") scheme(s1mono) blw(*.4) xtick(0(0.05)1) xlabel(0(0.1)1)
graph save "graphs/diversity/hist_group_fatherinc_04_quart_`q'_mean", replace
graph export "graphs/diversity/hist_group_fatherinc_04_quart_`q'_mean.pdf", replace
graph export "graphs/diversity/hist_group_fatherinc_04_quart_`q'_mean.tif", replace

}

*******************************************************************************
*** Sumstats of with military groups ***
*******************************************************************************
** Tag one obs in each group to get an unweighted measure **

egen group_yorc_tag=tag(group_yorc) if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 

sum group_yorc_size, detail

distinct group_yorc 

set more off
eststo clear

eststo: estpost sum group_yorc_size group_yorc_highinc_fath_04 group_yorc_highinc_fath_04_mean group_yorc_fath_inc_04_q_1 group_yorc_fath_inc_04_q_2 group_yorc_fath_inc_04_q_3 group_yorc_fath_inc_04_q_4 group_yorc_fath_inc_04_q_1_mean group_yorc_fath_inc_04_q_2_mean group_yorc_fath_inc_04_q_3_mean group_yorc_fath_inc_04_q_4_mean  if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0  &smallgroup6_yorc==0 & !missing(group_yorc), det

eststo: estpost sum group_yorc_size group_yorc_highinc_fath_04 group_yorc_highinc_fath_04_mean group_yorc_fath_inc_04_q_1 group_yorc_fath_inc_04_q_2 group_yorc_fath_inc_04_q_3 group_yorc_fath_inc_04_q_4 group_yorc_fath_inc_04_q_1_mean group_yorc_fath_inc_04_q_2_mean group_yorc_fath_inc_04_q_3_mean group_yorc_fath_inc_04_q_4_mean  if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0  &!missing(group_yorc), det

eststo: estpost sum group_yorc_size group_yorc_highinc_fath_04 group_yorc_highinc_fath_04_mean group_yorc_fath_inc_04_q_1 group_yorc_fath_inc_04_q_2 group_yorc_fath_inc_04_q_3 group_yorc_fath_inc_04_q_4 group_yorc_fath_inc_04_q_1_mean group_yorc_fath_inc_04_q_2_mean group_yorc_fath_inc_04_q_3_mean group_yorc_fath_inc_04_q_4_mean if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6_yorc==0 & group_yorc_tag==1 & !missing(group_yorc), det

eststo: estpost sum group_yorc_size group_yorc_highinc_fath_04 group_yorc_highinc_fath_04_mean group_yorc_fath_inc_04_q_1 group_yorc_fath_inc_04_q_2 group_yorc_fath_inc_04_q_3 group_yorc_fath_inc_04_q_4 group_yorc_fath_inc_04_q_1_mean group_yorc_fath_inc_04_q_2_mean group_yorc_fath_inc_04_q_3_mean group_yorc_fath_inc_04_q_4_mean  if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & group_yorc_tag==1 & !missing(group_yorc), det

esttab est1 est2 using "output/diversity/sumstats_diversity_yorc.tex", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar mtitles("All obs., group size>5" "All obs") replace label

esttab est1 est2 using "output/diversity/sumstats_diversity_yorc.rtf", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar mtitles("All obs., group size>5" "All obs" ) replace label

esttab est3 est4 using "output/diversity/sumstats_diversity_yorc_unwe.tex", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar mtitles("One obs. per yorc, group size>5" "One obs. per yorc") replace label

esttab est3 est4 using "output/diversity/sumstats_diversity_yorc_unwe.rtf", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar mtitles("One obs. per yorc, group size>5" "One obs. per yorc") replace label


********************************************************************************
*** Merge school data (the data created in Part A) ***
********************************************************************************

cd /home/almdan/EoC/analysis/revising_citizenship/temp_data_schooldiversity/

merge 1:1 lopnr using "EOC_diversity_school_2001_2005_b", keepusing(avgar group_school_size group_school_foreign group_school_foreign_mean group_school_highinc_fath group_school_highinc_fath_mean group_school_fath_inc_q_1 group_school_fath_inc_q_2 group_school_fath_inc_q_3 group_school_fath_inc_q_4 group_school_fath_inc_q_1_mean group_school_fath_inc_q_2_mean group_school_fath_inc_q_3_mean group_school_fath_inc_q_4_mean smallgroup group_school group_school_higheduc group_school_higheduc_mean) generate(merge_school_diversity)

drop if merge_school_diversity==2

sum group_school_size group_yorc_size, det

gen smallgroup6=(group_school_size<6)

distinct group_school if avgar>2000 & avgar<2006 & (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04)

********************************************************************************
*** Sumstats of with school groups (the matched school sample) ***
********************************************************************************

cd /home/almdan/EoC/analysis/revising_citizenship/POBE_final/

*** Tag one obs in each group to get an unweighted measure **
distinct group_school if avgar>2000 & avgar<2006 
distinct group_school if avgar>2000 & avgar<2006 & smallgroup6==0 
distinct group_school if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04)

egen group_school_tag=tag(group_school) if avgar>2000 & avgar<2006
egen group_school_tag_small6=tag(group_school) if avgar>2000 & avgar<2006 & smallgroup6==0
egen group_school_tag_small6_fullr=tag(group_school) if avgar>2000 & avgar<2006 & (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04)

tab group_school_tag
tab group_school_tag_small6
tab group_school_tag_small6_fullr


set more off
eststo clear

eststo: estpost sum group_school_size group_school_foreign group_school_foreign_mean group_school_highinc_fath group_school_highinc_fath_mean group_school_fath_inc_q_1 group_school_fath_inc_q_2 group_school_fath_inc_q_3 group_school_fath_inc_q_4 group_school_fath_inc_q_1_mean group_school_fath_inc_q_2_mean group_school_fath_inc_q_3_mean group_school_fath_inc_q_4_mean group_school_higheduc group_school_higheduc_mean group_school_tag if avgar>2000 & avgar<2006 & smallgroup6==0, det

eststo: estpost sum group_school_size group_school_foreign group_school_foreign_mean group_school_highinc_fath group_school_highinc_fath_mean group_school_fath_inc_q_1 group_school_fath_inc_q_2 group_school_fath_inc_q_3 group_school_fath_inc_q_4 group_school_fath_inc_q_1_mean group_school_fath_inc_q_2_mean group_school_fath_inc_q_3_mean group_school_fath_inc_q_4_mean group_school_higheduc group_school_higheduc_mean group_school_tag if avgar>2000 & avgar<2006, det

eststo: estpost sum group_school_size group_school_foreign group_school_foreign_mean group_school_highinc_fath group_school_highinc_fath_mean group_school_fath_inc_q_1 group_school_fath_inc_q_2 group_school_fath_inc_q_3 group_school_fath_inc_q_4 group_school_fath_inc_q_1_mean group_school_fath_inc_q_2_mean group_school_fath_inc_q_3_mean group_school_fath_inc_q_4_mean group_school_higheduc group_school_higheduc_mean group_school_tag if avgar>2000 & avgar<2006 & smallgroup6==0 & group_school_tag_small6==1, det

eststo: estpost sum group_school_size group_school_foreign group_school_foreign_mean group_school_highinc_fath group_school_highinc_fath_mean group_school_fath_inc_q_1 group_school_fath_inc_q_2 group_school_fath_inc_q_3 group_school_fath_inc_q_4 group_school_fath_inc_q_1_mean group_school_fath_inc_q_2_mean group_school_fath_inc_q_3_mean group_school_fath_inc_q_4_mean group_school_higheduc group_school_higheduc_mean group_school_tag if avgar>2000 & avgar<2006 & group_school_tag==1, det

esttab est1 est2 using "output/diversity/sumstats_diversity_school.tex", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar mtitles("All obs., group size>5" "All obs") replace label

esttab est1 est2 using "output/diversity/sumstats_diversity_school.rtf", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar mtitles("All obs., group size>5" "All obs" ) replace label

esttab est3 est4 using "output/diversity/sumstats_diversity_school_unwe.tex", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar mtitles("One obs. per school, group size>5" "One obs. per school") replace label

esttab est3 est4 using "output/diversity/sumstats_diversity_school_unwe.rtf", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar mtitles("One obs. per school, group size>5" "One obs. per school") replace label


********************************************************************************
*** Share with high fathers income ***
********************************************************************************
cd /home/almdan/EoC/analysis/revising_citizenship/POBE_final/

gen high_inc_more=(group_yorc_highinc_fath_04>group_school_highinc_fath) if !missing(group_yorc_highinc_fath_04) & !missing(group_school_highinc_fath)

gen high_inc_diff=(group_yorc_highinc_fath_04-group_school_highinc_fath) if !missing(group_yorc_highinc_fath_04) & !missing(group_school_highinc_fath)


label variable group_yorc_highinc_fath_04 "Share peers with high father's income, military"

label variable group_school_highinc_fath "Share peers with high father's income, school"

label variable high_inc_diff "Difference in high income fathers, military-school)"

label variable high_inc_more "Share with Difference > 0"
*** military ***

hist group_yorc_highinc_fath_04 if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6_yorc==0, frac lcolor(black) title("Selective Conscription") xtitle("Share of high SES peers") scheme(s1mono) xtick(0(0.05)1) xlabel(0(0.1)1) ytick(0(0.01)0.07) ylabel(0(0.02)0.07) width(0.01)  blw(*.4) bcolor(gs11) blcolor(black) 
graph save "graphs/diversity/share_highinc_military04", replace
graph export "graphs/diversity/share_highinc_military04.pdf", replace
graph export "graphs/diversity/share_highinc_military04.tif", replace

*** School ***
hist group_school_highinc_fath if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0, frac lcolor(black) title("High School") xtitle("Share of high SES peers") scheme(s1mono) xtick(0(0.05)1) xlabel(0(0.1)1) ytick(0(0.01)0.07) ylabel(0(0.02)0.07) width(0.01)  blw(*.4) bcolor(gs11) blcolor(black) 
graph save "graphs/diversity/share_highinc_school04", replace
graph export "graphs/diversity/share_highinc_school04.pdf", replace
graph export "graphs/diversity/share_highinc_school04.tif", replace

*** Difference ***

hist high_inc_diff if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0, frac lcolor(black) xtitle("Difference") title("Selective Conscription") scheme(s1mono) xtick(-1(0.1)1) xlabel(-1(0.2)1) ytick(0(0.01)0.07) ylabel(0(0.02)0.07) width(0.025) blw(*.4)  bcolor(gs11) blcolor(black) addplot(pci 0 0 .07 0, lcolor(black) lwidth(medthick) lpattern(dash)) legend(off) 
graph save "graphs/diversity/share_highinc_difference04", replace
graph export "graphs/diversity/share_highinc_difference04.pdf", replace

hist high_inc_diff if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & high_inc_father_04==0, frac lcolor(black) xtitle("Difference") title("Selective Conscription")  scheme(s1mono) xtick(-1(0.1)1) xlabel(-1(0.2)1) ytick(0(0.01)0.07) ylabel(0(0.02)0.07) width(0.025) blw(*.4)  bcolor(gs11) blcolor(black) addplot(pci 0 0 .07 0, lcolor(black) lwidth(medthick) lpattern(dash)) legend(off) 
graph save "graphs/diversity/share_highinc0_difference04", replace
graph export "graphs/diversity/share_highinc0_difference04.pdf", replace

hist high_inc_diff if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & high_inc_father_04==1, frac lcolor(black) xtitle("Difference") title("Selective Conscription")  scheme(s1mono) xtick(-1(0.1)1) xlabel(-1(0.2)1) ytick(0(0.01)0.07) ylabel(0(0.02)0.07) width(0.025) blw(*.4)  bcolor(gs11) blcolor(black) addplot(pci 0 0 .07 0, lcolor(black) lwidth(medthick) lpattern(dash)) legend(off) 
graph save "graphs/diversity/share_highinc1_difference04", replace
graph export "graphs/diversity/share_highinc1_difference04.pdf", replace

********************************************************************************
*** Figure F.1: Income quartiles of father's ***
********************************************************************************

graph bar if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0, over(quart_earn_age3645_men, relabel(1 "Quartile 1" 2 "Quartile 2" 3 "Quartile 3" 4 "Quartile 4")) bargap(1) ytitle("Percent") ytick(0(5)40) ylabel(0(10)40) title("Selective Conscription") saving("graphs/descriptives_inc_04", replace) 
graph export "graphs/diversity/descriptives_inc_04.pdf", replace
graph export "graphs/diversity/descriptives_inc_04.tif", replace

*** Matched sample, the unmatched school sample is computed above ***
graph bar if avgar>2000 & avgar<2006, over(quart_earn_age3645_men, relabel(1 "Quartile 1" 2 "Quartile 2" 3 "Quartile 3" 4 "Quartile 4")) bargap(1) ytitle("Percent") ytick(0(5)40) ylabel(0(10)40) title("High School")  saving("graphs/diversity/descriptives_inc_school04", replace) 
graph export "graphs/diversity/descriptives_inc_school04.pdf", replace
graph export "graphs/diversity/descriptives_inc_school04.tif", replace

tab quart_earn_age3645_men if avgar>2000 & avgar<2006

********************************************************************************
*** Summary Statistics ***
********************************************************************************
set more off
eststo clear

eststo: estpost sum group_school_highinc_fath group_yorc_highinc_fath_04  high_inc_diff high_inc_more if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04), detail

esttab est1 using "output/diversity/sumstats_compare_highinc.rtf", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max")  mtitles("Selective Conscription") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar replace label

esttab est1  using "output/diversity/sumstats_compare_highinc.tex", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max") mtitles("School" "Military" "Difference" "Share with difference > 0") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar replace label

**** For those with low income fathers ***
gen low_inc_father_04=1 if high_inc_father_04==0
replace low_inc_father_04=0 if high_inc_father_04==1

set more off
eststo clear

eststo: estpost sum group_school_highinc_fath group_yorc_highinc_fath_04  high_inc_diff high_inc_more if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04) & high_inc_father_04==0 , detail

esttab est1 using "output/diversity/sumstats_compare_highinc_0.rtf", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max")  mtitles("Selective Conscription") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar replace label

esttab est1  using "output/diversity/sumstats_compare_highinc_0.tex", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max") mtitles("School" "Military" "Difference" "Share with difference > 0") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar replace label

**** For those with high income fathers ***

set more off
eststo clear

eststo: estpost sum group_school_highinc_fath group_yorc_highinc_fath_04  high_inc_diff high_inc_more if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04) & high_inc_father_04==1 , detail

eststo: estpost sum group_school_highinc_fath if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04) & high_inc_father_04==1, det

eststo: estpost sum group_yorc_highinc_fath_04 if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04) & high_inc_father_04==1, det

eststo: estpost sum high_inc_diff if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04) & high_inc_father_04==1, det

eststo: estpost sum high_inc_more if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04) & high_inc_father_04==1, det


esttab est1 using "output/diversity/sumstats_compare_highinc_1.rtf", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max")  mtitles("Selective Conscription") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar replace label

esttab est1  using "output/diversity/sumstats_compare_highinc_1.tex", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max") mtitles("School" "Military" "Difference" "Share with difference > 0") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar replace label

esttab est2 est3 est4 est5 using "output/diversity/sumstats_compare_highinc_wide_1.rtf", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max")  mtitles("School" "Military" "Difference" "Share with difference > 0") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar replace label

esttab est2 est3 est4 est5  using "output/diversity/sumstats_compare_highinc_wide_1.tex", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max") mtitles("School" "Military" "Difference" "Share with difference > 0") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar replace label

*********************************************************************************
*** Table F.1: Regressions on sorting ***
*********************************************************************************

set more off
eststo clear

eststo: reg group_yorc_highinc_fath_04 group_school_highinc_fath if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04), cluster(group_school)
estadd ysumm, mean

eststo: reg group_yorc_highinc_fath_04 high_inc_father_04 if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04), cluster(group_school)
estadd ysumm, mean

eststo: reg group_yorc_highinc_fath_04 group_school_highinc_fath high_inc_father_04 if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04), cluster(group_school)
estadd ysumm, mean


esttab est* using "output/diversity/reg_share_high_inc_predict.rtf",  varwidth(17) modelwidth(5, 7, 5) scalars("ymean Mean" "r2_a Adj. R-squared" "r2 R-squared") se  replace label starlevels(* 0.1 ** 0.05 *** 0.01)

esttab est* using "output/diversity/reg_share_high_inc_predict.tex", varwidth(17) modelwidth(5, 7, 5) scalars("ymean Mean" "r2_a Adj. R-squared" "r2 R-squared")  se replace label starlevels(* 0.1 ** 0.05 *** 0.01)

********************************************************************************
*** Checking likelihood of large moves along the SES distribution ***
*** From a quartile in school to another quartile in the military ***
********************************************************************************

sum group_yorc_highinc_fath_04 group_school_highinc_fath if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04), det
*** Same but for low SES ***
sum group_yorc_highinc_fath_04 group_school_highinc_fath if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04) & high_inc_father_04==0 , det
*** Same but for high SES ***
sum group_yorc_highinc_fath_04 group_school_highinc_fath if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04) & high_inc_father_04==1 , det


*** Creating indicator: Move from bottom to top in general ***

gen move_1to4_inc=(group_yorc_highinc_fath_04>0.6470488 & group_school_highinc_fath<0.4611614) if !missing(group_yorc_highinc_fath_04) & !missing(group_school_highinc_fath) & smallgroup6==0 & smallgroup6_yorc==0 

*** Creating indicator: Move from top to bottom in general ***
gen move_4to1_inc=(group_yorc_highinc_fath_04<0.5092  & group_school_highinc_fath>0.6535433) if !missing(group_yorc_highinc_fath_04) & !missing(group_school_highinc_fath) & smallgroup6==0 & smallgroup6_yorc==0 

*** 

xtile SES_military_quartile = group_yorc_highinc_fath_04 if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04), nquantile(4)

pctile SES_military_quartile_value = group_yorc_highinc_fath_04 if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04), nquantile(4)

xtile SES_school_quartile = group_school_highinc_fath if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04), nquantile(4)

pctile SES_school_quartile_value = group_school_highinc_fath if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04), nquantile(4)

********************************************************************************
*** Analysis of flows from school quartile to military quartiles ***

tab SES_school_quartile SES_military_quartile , row cell

tab SES_school_quartile SES_military_quartile if high_inc_father_04==0, row cell

tab SES_school_quartile SES_military_quartile if high_inc_father_04==1, row cell


tab SES_school_quartile high_inc_father_04, row cell

tab SES_military_quartile high_inc_father_04, row cell

sum foreign if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04)

*** An alternative way ***
********************************************************************************
*** Analysis: Move from bottom to top in general ***
sum move_1to4_inc move_4to1_inc if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04)

********************************************************************************
*** Conditioning on being in the lowest quartile in school ***
sum move_1to4_inc if group_school_highinc_fath<0.4611614 & (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04)

********************************************************************************
*** For low SES: Conditioning on being in the lowest quartile in school ***

sum move_1to4_inc if group_school_highinc_fath<0.4611614 & high_inc_father_04==0 & (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04)


********************************************************************************
*** For high SES: Conditioning on being in the lowest quartile in school ***

sum move_1to4_inc if group_school_highinc_fath<0.4611614 & high_inc_father_04==1 & (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04)


********************************************************************************
********************************************************************************
********************************************************************************
*** Analysis: Move from top to bottom in general ***
sum move_1to4_inc move_4to1_inc if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04)

********************************************************************************
*** Conditioning on being in the top quartile in school ***
sum move_4to1_inc if group_school_highinc_fath>0.6535433 & (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04)


********************************************************************************
*** For low SES: Conditioning on being in the top quartile in school ***

sum move_4to1_inc if group_school_highinc_fath>0.6535433 & high_inc_father_04==0 & (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04)


********************************************************************************
*** For high SES: Conditioning on being in the top quartile in school ***

sum move_4to1_inc if group_school_highinc_fath>0.6535433 & high_inc_father_04==1 & (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04)


********************************************************************************
*** Hypothetical example: if no positive selection and each group had the
*** same share of high SES peers in each group as in the school in general (0.5604)
*******************************************************************************

gen share_high_inc_peers_eq56 = 0.5604 if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04)

gen high_inc_more_nosorting_adj56 =(share_high_inc_peers_eq56>group_school_highinc_fath) if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04)

gen high_inc_diff_nosorting_adj56 =(share_high_inc_peers_eq56-group_school_highinc_fath) if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04)


***

sum high_inc_more_nosorting_adj56 if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04), det

sum high_inc_diff_nosorting_adj56 if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04)


*** Difference hypothetical example ***

hist high_inc_diff_nosorting_adj56 if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0, frac lcolor(black) xtitle("Difference") scheme(s1mono) xtick(-1(0.1)1) xlabel(-1(0.2)1) ytick(0(0.01)0.08) ylabel(0(0.02)0.08) width(0.025) blw(*.4)  bcolor(gs11) blcolor(black)  addplot(pci 0 0 0.08 0, lcolor(black) lwidth(medthick) lpattern(dash)) legend(off) 
graph save "graphs/diversity/share_highinc_nosort_difference04", replace
graph export "graphs/diversity/share_highinc_nosort_difference04.pdf", replace

**** Twoway graph ***

twoway hist high_inc_diff if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0, frac blcolor(black) legend(off) width(0.025) blw(*.4)  bcolor(gs11) blcolor(black) ||  hist high_inc_diff_nosorting_adj56 if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0, frac blcolor(black) xtitle("Difference") scheme(s1mono) xtick(-1(0.1)1) xlabel(-1(0.2)1) ytick(0(0.01)0.08) ylabel(0(0.02)0.08) width(0.025) legend(off) bcolor(gs15) blw(*.4) || pci 0 0 0.08 0, lcolor(black) lwidth(medthick) lpattern(dash) legend(off) 

graph save "graphs/diversity/share_highinc_nosort_difference04_twoway", replace
graph export "graphs/diversity/share_highinc_nosort_difference04_twoway.pdf", replace


*** Table ***
set more off
eststo clear

eststo: estpost sum high_inc_more_nosorting_adj56 high_inc_diff_nosorting_adj56 if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04), det

esttab est1 using "output/diversity/sumstats_compare_highinc_nosort.rtf", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max")  mtitles("Selective Conscription") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar replace label

esttab est1  using "output/diversity/sumstats_compare_highinc_nosort.tex", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max") mtitles("School" "Military" "Difference" "Share with difference > 0") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar replace label


********************************************************************************
*** for low fathers income ***

sum high_inc_more_nosorting_adj56  if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04) & high_inc_father_04==0

sum high_inc_diff_nosorting_adj56  if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04) & high_inc_father_04==0

*** Difference ***

hist high_inc_diff_nosorting_adj56 if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & high_inc_father_04==0, frac lcolor(black) title("Selective Conscription") xtitle("Difference") scheme(s1mono) xtick(-1(0.1)1) xlabel(-1(0.2)1) ytick(0(0.01)0.08) ylabel(0(0.02)0.08) width(0.025) blw(*.4)  bcolor(gs11) blcolor(black)  addplot(pci 0 0 0.08 0, lcolor(black) lwidth(medthick) lpattern(dash)) legend(off) 
graph save "graphs/diversity/share_highinc_nosort0_difference04", replace
graph export "graphs/diversity/share_highinc_nosort0_difference04.pdf", replace

**** Twoway graph ***

twoway hist high_inc_diff if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0  & high_inc_father_04==0, frac blcolor(black) legend(off) width(0.025)  bcolor(gs11) blw(*.4) || hist high_inc_diff_nosorting_adj56 if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0  & high_inc_father_04==0, frac blcolor(black) title("Selective Conscription") xtitle("Difference") scheme(s1mono) xtick(-1(0.1)1) xlabel(-1(0.2)1) ytick(0(0.01)0.08) ylabel(0(0.02)0.08) width(0.025) legend(off) bcolor(gs15) blw(*.4) || pci 0 0 0.08 0, lcolor(black) lwidth(medthick) lpattern(dash) legend(off) 

graph save "graphs/diversity/share_highinc_nosort0_difference04_twoway", replace
graph export "graphs/diversity/share_highinc_nosort0_difference04_twoway.pdf", replace


*** Table ***
set more off
eststo clear

eststo: estpost sum high_inc_more_nosorting_adj56 high_inc_diff_nosorting_adj56 if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & high_inc_father_04==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04), det

esttab est1 using "output/diversity/sumstats_compare_highinc0_nosort.rtf", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max")  mtitles("Selective Conscription") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar replace label

esttab est1  using "output/diversity/sumstats_compare_highinc0_nosort.tex", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max") mtitles("School" "Military" "Difference" "Share with difference > 0") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar replace label


********************************************************************************
*** for high fathers income ***

sum high_inc_more_nosorting_adj56  if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04) & high_inc_father_04==1

sum high_inc_diff_nosorting_adj56  if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04) & high_inc_father_04==1


*** Difference ***


hist high_inc_diff_nosorting_adj56 if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & high_inc_father_04==1, frac lcolor(black) title("Selective Conscription") xtitle("Difference") scheme(s1mono) xtick(-1(0.1)1) xlabel(-1(0.2)1) ytick(0(0.01)0.08) ylabel(0(0.02)0.08) width(0.025) blw(*.4)  bcolor(gs11) blcolor(black)  addplot(pci 0 0 0.08 0, lcolor(black) lwidth(medthick) lpattern(dash)) legend(off) 
graph save "graphs/diversity/share_highinc_nosort1_difference04", replace
graph export "graphs/diversity/share_highinc_nosort1_difference04.pdf", replace

**** Twoway graph ***

twoway hist high_inc_diff if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0  & high_inc_father_04==1, frac blcolor(black) legend(off) width(0.025)  bcolor(gs11) blw(*.4) || hist high_inc_diff_nosorting_adj56 if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0  & high_inc_father_04==1, frac blcolor(black) title("Selective Conscription") xtitle("Difference") scheme(s1mono) xtick(-1(0.1)1) xlabel(-1(0.2)1) ytick(0(0.01)0.08) ylabel(0(0.02)0.08) width(0.025) legend(off) bcolor(gs15) blw(*.4) || pci 0 0 0.08 0, lcolor(black) lwidth(medthick) lpattern(dash) legend(off) 

graph save "graphs/diversity/share_highinc_nosort1_difference04_twoway", replace
graph export "graphs/diversity/share_highinc_nosort1_difference04_twoway.pdf", replace


*** Table ***
set more off
eststo clear

eststo: estpost sum high_inc_more_nosorting_adj56 high_inc_diff_nosorting_adj56 if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & high_inc_father_04==1 & !missing(group_school_highinc_fath) & !missing(group_yorc_highinc_fath_04), det

esttab est1 using "output/diversity/sumstats_compare_highinc1_nosort.rtf", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max")  mtitles("Selective Conscription") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar replace label

esttab est1  using "output/diversity/sumstats_compare_highinc1_nosort.tex", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max") mtitles("School" "Military" "Difference" "Share with difference > 0") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar replace label

********************************************************************************
********************************************************************************
*** Complementary analysis: SES by father's education level ***
*** Results: Sligtly more sorting and less mobility ***
********************************************************************************
********************************************************************************

*** fathers education ***
tab fath_educ_level
gen fathers_high_educ_04=(fath_educ_level>3) if  !missing(fath_educ_level)

** share high educ fathers in groups **

bysort group_yorc: egen group_yorc_higheduc = total(fathers_high_educ_04) if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 
replace group_yorc_higheduc = (group_yorc_higheduc - fathers_high_educ_04)/(group_yorc_size  - 1) if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 

sum group_yorc_higheduc, detail

sum group_yorc_higheduc if fathers_high_educ_04==0, detail
sum group_yorc_higheduc if fathers_high_educ_04==1, detail

hist group_yorc_higheduc if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0  & smallgroup6_yorc==0, frac lcolor(black) lpattern("l") xtitle("Leave out mean high father's education'") scheme(s1mono) xtick(0(0.05)1) xlabel(0(0.1)1)
graph save "graphs/diversity/hist_group_yorc_fatheduc", replace
graph export "graphs/diversity/hist_group_yorc_fatheduc.pdf", replace

*** Simple Mean ***

by group_yorc: egen group_yorc_higheduc_mean = mean(fathers_high_educ_04) if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 

sum group_yorc_higheduc_mean, detail

sum group_yorc_higheduc_mean if fathers_high_educ_04==0, detail
sum group_yorc_higheduc_mean if fathers_high_educ_04==1, detail

hist group_yorc_higheduc_mean if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0  & smallgroup6_yorc==0, frac lcolor(black) lpattern("l") xtitle("Mean high father's education") scheme(s1mono) xtick(0(0.05)1) xlabel(0(0.1)1)
graph save "graphs/diversity/hist_group_yorc_fatheduc_mean", replace
graph export "graphs/diversity/hist_group_yorc_fatheduc_mean.pdf", replace

*******************************************************************************
gen fathers_educ_quart_04=1 if (fath_educ_level<3) &  !missing(fath_educ_level)
replace fathers_educ_quart_04=2 if (fath_educ_level==3) &  !missing(fath_educ_level)
replace fathers_educ_quart_04=3 if (fath_educ_level==4 | fathers_educ==5) &  !missing(fath_educ_level)
replace fathers_educ_quart_04=4 if (fath_educ_level>5) &  !missing(fath_educ_level)

hist fathers_educ_quart_04 if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0  & smallgroup6_yorc==0

tab fathers_educ_quart_04 if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0  & smallgroup6_yorc==0, gen(fathers_educ_quart__04_)


set scheme s1mono
graph bar  if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0, over(fathers_educ_quart_04, relabel(1 "Level 1" 2 "Level 2" 3 "Level 3" 4 "Level 4")) bargap(1) ytitle("Percent") title("Selective Conscription") saving("graphs/descriptives_educ_04", replace) 
graph export "graphs/diversity/descriptives_educ_04.pdf", replace

*********************************************************************************
*** Like Table F.1: Regressions on sorting but on education level ***
*********************************************************************************

gen  maxsort_educ=1 if fathers_high_educ_04==1
replace maxsort_educ=0 if fathers_high_educ_04==0

reg maxsort_educ group_school_higheduc  if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_higheduc) & !missing(group_yorc_higheduc), cluster(group_school)


set more off
eststo clear

eststo: reg group_yorc_higheduc group_school_higheduc if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_higheduc) & !missing(group_yorc_higheduc), cluster(group_school)
estadd ysumm, mean

eststo: reg group_yorc_higheduc fathers_high_educ_04 if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_higheduc) & !missing(group_yorc_higheduc), cluster(group_school)
estadd ysumm, mean

eststo: reg group_yorc_higheduc group_school_higheduc fathers_high_educ_04 if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_higheduc) & !missing(group_yorc_higheduc), cluster(group_school)
estadd ysumm, mean


esttab est* using "output/diversity/reg_share_high_educ_predict.rtf",  varwidth(17) modelwidth(5, 7, 5) scalars("ymean Mean" "r2_a Adj. R-squared" "r2 R-squared") se  replace label starlevels(* 0.1 ** 0.05 *** 0.01)

esttab est* using "output/diversity/reg_share_high_educ_predict.tex", varwidth(17) modelwidth(5, 7, 5) scalars("ymean Mean" "r2_a Adj. R-squared" "r2 R-squared")  se replace label starlevels(* 0.1 ** 0.05 *** 0.01)

********************************************************************************
*** Checking likelihood of large moves along the SES distribution ***
********************************************************************************

sum group_yorc_higheduc group_school_higheduc if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_higheduc) & !missing(group_yorc_higheduc), det
*** Same but for low SES ***
sum group_yorc_higheduc group_school_higheduc if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_higheduc) & !missing(group_yorc_higheduc) & fathers_high_educ_04==0 , det
*** Same but for high SES ***
sum group_yorc_higheduc group_school_higheduc if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_higheduc) & !missing(group_yorc_higheduc) & fathers_high_educ_04==1 , det


*** Creating indicator: Move from bottom to top in general ***

gen move_1to4_educ=(group_yorc_higheduc>0.5581396 & group_school_higheduc<0.2857143) if !missing(group_yorc_higheduc) & !missing(group_school_higheduc) & smallgroup6==0 & smallgroup6_yorc==0 

*** Creating indicator: Move from top to bottom in general ***
gen move_4to1_educ=(group_yorc_higheduc<0.3640351  & group_school_higheduc>0.592233) if !missing(group_yorc_higheduc) & !missing(group_school_higheduc) & smallgroup6==0 & smallgroup6_yorc==0 

*** 

xtile SES_military_quartile_educ = group_yorc_higheduc if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_higheduc) & !missing(group_yorc_higheduc), nquantile(4)

pctile SES_military_quartile_educ_value = group_yorc_higheduc if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_higheduc) & !missing(group_yorc_higheduc), nquantile(4)

xtile SES_school_quartile_educ = group_school_higheduc if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_higheduc) & !missing(group_yorc_higheduc), nquantile(4)

pctile SES_school_quartile_educ_value = group_school_higheduc if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_higheduc) & !missing(group_yorc_higheduc), nquantile(4)

********************************************************************************
*** Analysis of flows from school quartile to military quartiles ***

tab SES_school_quartile_educ SES_military_quartile_educ , row cell

tab SES_school_quartile_educ SES_military_quartile_educ if fathers_high_educ_04==0, row cell

tab SES_school_quartile_educ SES_military_quartile_educ if fathers_high_educ_04==1, row cell


tab SES_school_quartile_educ fathers_high_educ_04, row cell

tab SES_military_quartile_educ fathers_high_educ_04, row cell

*******************************************************************************
*** Sumstats of with military groups: now with education variables included ***
*******************************************************************************

set more off
eststo clear

eststo: estpost sum group_yorc_size group_yorc_highinc_fath_04 group_yorc_highinc_fath_04_mean group_yorc_fath_inc_04_q_1 group_yorc_fath_inc_04_q_2 group_yorc_fath_inc_04_q_3 group_yorc_fath_inc_04_q_4 group_yorc_fath_inc_04_q_1_mean group_yorc_fath_inc_04_q_2_mean group_yorc_fath_inc_04_q_3_mean group_yorc_fath_inc_04_q_4_mean group_yorc_higheduc if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0  &smallgroup6_yorc==0 & !missing(group_yorc), det

eststo: estpost sum group_yorc_size group_yorc_highinc_fath_04 group_yorc_highinc_fath_04_mean group_yorc_fath_inc_04_q_1 group_yorc_fath_inc_04_q_2 group_yorc_fath_inc_04_q_3 group_yorc_fath_inc_04_q_4 group_yorc_fath_inc_04_q_1_mean group_yorc_fath_inc_04_q_2_mean group_yorc_fath_inc_04_q_3_mean group_yorc_fath_inc_04_q_4_mean group_yorc_higheduc if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0  &!missing(group_yorc), det

eststo: estpost sum group_yorc_size group_yorc_highinc_fath_04 group_yorc_highinc_fath_04_mean group_yorc_fath_inc_04_q_1 group_yorc_fath_inc_04_q_2 group_yorc_fath_inc_04_q_3 group_yorc_fath_inc_04_q_4 group_yorc_fath_inc_04_q_1_mean group_yorc_fath_inc_04_q_2_mean group_yorc_fath_inc_04_q_3_mean group_yorc_fath_inc_04_q_4_mean group_yorc_higheduc if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6_yorc==0 & group_yorc_tag==1 & !missing(group_yorc), det

eststo: estpost sum group_yorc_size group_yorc_highinc_fath_04 group_yorc_highinc_fath_04_mean group_yorc_fath_inc_04_q_1 group_yorc_fath_inc_04_q_2 group_yorc_fath_inc_04_q_3 group_yorc_fath_inc_04_q_4 group_yorc_fath_inc_04_q_1_mean group_yorc_fath_inc_04_q_2_mean group_yorc_fath_inc_04_q_3_mean group_yorc_fath_inc_04_q_4_mean group_yorc_higheduc if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & group_yorc_tag==1 & !missing(group_yorc), det

esttab est1 est2 using "output/diversity/sumstats_diversity_yorc.tex", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar mtitles("All obs., group size>5" "All obs") replace label

esttab est1 est2 using "output/diversity/sumstats_diversity_yorc.rtf", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar mtitles("All obs., group size>5" "All obs" ) replace label

esttab est3 est4 using "output/diversity/sumstats_diversity_yorc_unwe.tex", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar mtitles("One obs. per yorc, group size>5" "One obs. per yorc") replace label

esttab est3 est4 using "output/diversity/sumstats_diversity_yorc_unwe.rtf", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar mtitles("One obs. per yorc, group size>5" "One obs. per yorc") replace label

********************************************************************************
********************************************************************************
*** Complementary analysis: Share foreign ***
*** Results: Sligtly less sorting and more mobility
********************************************************************************
********************************************************************************

** share foreign in groups **
by group_yorc: egen group_yorc_foreign = total(foreign) if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 
replace group_yorc_foreign = (group_yorc_foreign - foreign)/(group_yorc_size  - 1) if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 


sum group_yorc_foreign, detail

count if group_yorc_size<6
count if foreign==. & (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 

sum group_yorc_foreign if foreign==0, detail
sum group_yorc_foreign if foreign==1, detail

hist group_yorc_foreign if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup_yorc==0, frac lcolor(black) lpattern("l") xtitle("Leave out mean foreign background") scheme(s1mono) xtick(0(0.05)1) xlabel(0(0.1)1)
graph save "graphs/diversity/hist_group_yorc_foreign", replace
graph export "graphs/diversity/hist_group_yorc_foreign.pdf", replace

*** Simple mean (without leave one out). ****

by group_yorc: egen group_yorc_foreign_mean = mean(foreign) if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 

sum group_yorc_foreign_mean, detail

sum group_yorc_foreign_mean if foreign==0, detail
sum group_yorc_foreign_mean if foreign==1, detail

hist group_yorc_foreign_mean if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup_yorc==0 & smallgroup_yorc==0, frac lcolor(black) lpattern("l") xtitle("Mean foreign background") scheme(s1mono) xtick(0(0.05)1) xlabel(0(0.1)1)
graph save "graphs/diversity/hist_group_yorc_foreign_mean", replace
graph export "graphs/diversity/hist_group_yorc_foreign_mean.pdf", replace

*********************************************************************************
*** Like Table F.1: Regressions on sorting but by foreign background ***
*********************************************************************************

gen  maxsort_for=1 if foreign==1
replace maxsort_for=0 if foreign==0

reg maxsort_educ group_school_foreign  if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_foreign) & !missing(group_yorc_foreign), cluster(group_school)


set more off
eststo clear

eststo: reg group_yorc_foreign group_school_foreign if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_foreign) & !missing(group_yorc_foreign), cluster(group_school)
estadd ysumm, mean

eststo: reg group_yorc_foreign foreign if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_foreign) & !missing(group_yorc_foreign), cluster(group_school)
estadd ysumm, mean

eststo: reg group_yorc_foreign group_school_foreign foreign if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_foreign) & !missing(group_yorc_foreign), cluster(group_school)
estadd ysumm, mean


esttab est* using "output/diversity/reg_share_foreign_predict.rtf",  varwidth(17) modelwidth(5, 7, 5) scalars("ymean Mean" "r2_a Adj. R-squared" "r2 R-squared") se  replace label starlevels(* 0.1 ** 0.05 *** 0.01)

esttab est* using "output/diversity/reg_share_foreign_predict.tex", varwidth(17) modelwidth(5, 7, 5) scalars("ymean Mean" "r2_a Adj. R-squared" "r2 R-squared")  se replace label starlevels(* 0.1 ** 0.05 *** 0.01)

*******************************************************************************
*** Sumstats of with military groups: now with foreign included ***
*******************************************************************************

set more off
eststo clear

eststo: estpost sum group_yorc_size group_yorc_highinc_fath_04 group_yorc_highinc_fath_04_mean group_yorc_fath_inc_04_q_1 group_yorc_fath_inc_04_q_2 group_yorc_fath_inc_04_q_3 group_yorc_fath_inc_04_q_4 group_yorc_fath_inc_04_q_1_mean group_yorc_fath_inc_04_q_2_mean group_yorc_fath_inc_04_q_3_mean group_yorc_fath_inc_04_q_4_mean group_yorc_higheduc group_yorc_foreign foreign if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0  & smallgroup6_yorc==0 & !missing(group_yorc), det

eststo: estpost sum group_yorc_size group_yorc_highinc_fath_04 group_yorc_highinc_fath_04_mean group_yorc_fath_inc_04_q_1 group_yorc_fath_inc_04_q_2 group_yorc_fath_inc_04_q_3 group_yorc_fath_inc_04_q_4 group_yorc_fath_inc_04_q_1_mean group_yorc_fath_inc_04_q_2_mean group_yorc_fath_inc_04_q_3_mean group_yorc_fath_inc_04_q_4_mean group_yorc_higheduc group_yorc_foreign foreign if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0  &!missing(group_yorc), det

eststo: estpost sum group_yorc_size group_yorc_highinc_fath_04 group_yorc_highinc_fath_04_mean group_yorc_fath_inc_04_q_1 group_yorc_fath_inc_04_q_2 group_yorc_fath_inc_04_q_3 group_yorc_fath_inc_04_q_4 group_yorc_fath_inc_04_q_1_mean group_yorc_fath_inc_04_q_2_mean group_yorc_fath_inc_04_q_3_mean group_yorc_fath_inc_04_q_4_mean group_yorc_higheduc group_yorc_foreign foreign if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6_yorc==0 & group_yorc_tag==1 & !missing(group_yorc), det

eststo: estpost sum group_yorc_size group_yorc_highinc_fath_04 group_yorc_highinc_fath_04_mean group_yorc_fath_inc_04_q_1 group_yorc_fath_inc_04_q_2 group_yorc_fath_inc_04_q_3 group_yorc_fath_inc_04_q_4 group_yorc_fath_inc_04_q_1_mean group_yorc_fath_inc_04_q_2_mean group_yorc_fath_inc_04_q_3_mean group_yorc_fath_inc_04_q_4_mean group_yorc_higheduc group_yorc_foreign foreign if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & group_yorc_tag==1 & !missing(group_yorc), det

esttab est1 est2 using "output/diversity/sumstats_diversity_yorc.tex", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar mtitles("All obs., group size>5" "All obs") replace label

esttab est1 est2 using "output/diversity/sumstats_diversity_yorc.rtf", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar mtitles("All obs., group size>5" "All obs" ) replace label

esttab est3 est4 using "output/diversity/sumstats_diversity_yorc_unwe.tex", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar mtitles("One obs. per yorc, group size>5" "One obs. per yorc") replace label

esttab est3 est4 using "output/diversity/sumstats_diversity_yorc_unwe.rtf", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar mtitles("One obs. per yorc, group size>5" "One obs. per yorc") replace label

*******************************************************************************
*** Share foreign background ***
********************************************************************************

gen foreign_more=(group_yorc_foreign>group_school_foreign) if !missing(group_yorc_foreign) & !missing(group_school_foreign)

gen foreign_diff=(group_yorc_foreign-group_school_foreign) if !missing(group_yorc_foreign) & !missing(group_school_foreign)


label variable group_yorc_foreign "Share peers with foreign background, military"

label variable group_school_foreign "Share peers with foreign background, school"

label variable foreign_diff "Mean difference in peers with foreign background, military-school)"

label variable foreign_more "Share with difference > 0"
*** military ***


hist group_yorc_foreign if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6_yorc==0, frac lcolor(black) xtitle("Military") scheme(s1mono) xtick(0(0.05)1) xlabel(0(0.1)1) ytick(0(0.01)0.07) ylabel(0(0.02)0.07) width(0.01)
graph save "graphs/diversity/share_foreign_military04", replace
graph export "graphs/diversity/share_foreign_military04.pdf", replace

*** School ***
hist group_school_foreign if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0, frac lcolor(black) xtitle("School") scheme(s1mono) xtick(0(0.05)1) xlabel(0(0.1)1) ytick(0(0.01)0.07) ylabel(0(0.02)0.07) width(0.01)
graph save "graphs/diversity/share_foreign_school04", replace
graph export "graphs/diversity/share_foreign_school04.pdf", replace

*** Difference ***

hist foreign_diff if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0, frac lcolor(black) xtitle("Difference") scheme(s1mono) xtick(-1(0.1)1) xlabel(-1(0.2)1) ytick(0(0.01)0.07) ylabel(0(0.02)0.07) width(0.025)
graph save "graphs/diversity/share_foreign_difference04", replace
graph export "graphs/diversity/share_foreign_difference04.pdf", replace

hist foreign_diff if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & foreign==0, frac lcolor(black) xtitle("Difference") scheme(s1mono) xtick(-1(0.1)1) xlabel(-1(0.2)1) ytick(0(0.01)0.07) ylabel(0(0.02)0.07) width(0.025)  xline(0)
graph save "graphs/diversity/share_foreign0_difference04", replace
graph export "graphs/diversity/share_foreign0_difference04.pdf", replace

hist foreign_diff if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & foreign==1, frac lcolor(black) xtitle("Difference") scheme(s1mono) xtick(-1(0.1)1) xlabel(-1(0.2)1) ytick(0(0.01)0.07) ylabel(0(0.02)0.07) width(0.025) xline(0)
graph save "graphs/diversity/share_foreign1_difference04", replace
graph export "graphs/diversity/share_foreign1_difference04.pdf", replace

**** Table ****
set more off
eststo clear

eststo: estpost sum group_school_foreign group_yorc_foreign  foreign_diff foreign_more if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_foreign) & !missing(group_yorc_foreign), detail

esttab est1 using "output/diversity/sumstats_compare_foreign.rtf", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max")  mtitles("Selective Conscription") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar replace label

esttab est1  using "output/diversity/sumstats_compare_foreign.tex", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max") mtitles("School" "Military" "Difference" "Share with difference > 0") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar replace label


*** For those with a foreign background ***

set more off
eststo clear

eststo: estpost sum group_school_foreign group_yorc_foreign  foreign_diff foreign_more if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_foreign) & !missing(group_yorc_foreign) & foreign==1, detail

esttab est1 using "output/diversity/sumstats_compare_foreign1.rtf", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max")  mtitles("Selective Conscription") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar replace label

esttab est1  using "output/diversity/sumstats_compare_foreign1.tex", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max") mtitles("School" "Military" "Difference" "Share with difference > 0") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar replace label

*** For those with a non-foreign background ***

set more off
eststo clear

eststo: estpost sum group_school_foreign group_yorc_foreign  foreign_diff foreign_more if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_foreign) & !missing(group_yorc_foreign) & foreign==0, detail

esttab est1 using "output/diversity/sumstats_compare_foreign0.rtf", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max")  mtitles("Selective Conscription") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar replace label

esttab est1  using "output/diversity/sumstats_compare_foreign0.tex", cell("count mean (fmt(2)) p50 (fmt(2)) sd (fmt(2)) min max") mtitles("School" "Military" "Difference" "Share with difference > 0") varwidth(17) modelwidth(5, 7, 5) noobs nonumber nodepvar replace label

********************************************************************************
*** Checking likelihood of large moves along the distribution of share foreign *
********************************************************************************

sum group_yorc_foreign group_school_foreign if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_foreign) & !missing(group_yorc_foreign), det
*** Same but for low SES ***
sum group_yorc_foreign group_school_foreign if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_foreign) & !missing(group_yorc_foreign) & foreign==0 , det
*** Same but for high SES ***
sum group_yorc_foreign group_school_foreign if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_foreign) & !missing(group_yorc_foreign) & foreign==1 , det


*** Creating indicator: Move from bottom to top in general ***

gen move_1to4_foreign=(group_yorc_foreign>0.1281227 & group_school_foreign<0.05) if !missing(group_yorc_foreign) & !missing(group_school_foreign) & smallgroup6==0 & smallgroup6_yorc==0 

*** Creating indicator: Move from top to bottom in general ***
gen move_4to1_foreign=(group_yorc_foreign<0.0518519  & group_school_foreign>0.1733333) if !missing(group_yorc_foreign) & !missing(group_school_foreign) & smallgroup6==0 & smallgroup6_yorc==0 

*** 

xtile SES_military_quartile_foreign = group_yorc_foreign if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_foreign) & !missing(group_yorc_foreign), nquantile(4)

pctile SES_military_quartile_for_value = group_yorc_foreign if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_foreign) & !missing(group_yorc_foreign), nquantile(4)

xtile SES_school_quartile_foreign = group_school_foreign if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_foreign) & !missing(group_yorc_foreign), nquantile(4)

pctile SES_school_quartile_for_value = group_school_foreign if (inrar > 2001 & insk_date_sif < td(23sep2004)) & inrar !=. & civserv_inskr==0 & smallgroup6==0 & smallgroup6_yorc==0 & !missing(group_school_foreign) & !missing(group_yorc_foreign), nquantile(4)

********************************************************************************
*** Analysis of flows from school quartile to military quartiles ***

tab SES_school_quartile_foreign SES_military_quartile_foreign , row cell

tab SES_school_quartile_foreign SES_military_quartile_foreign if foreign==0, row cell

tab SES_school_quartile_foreign SES_military_quartile_foreign if foreign==1, row cell


tab SES_school_quartile_foreign foreign, row cell

tab SES_military_quartile_foreign foreign, row cell
